package com.tl.spark.scala

import java.io.ByteArrayInputStream
import java.nio.file.{Files, Paths}
import java.util.Collections

import org.dom4j.Document
import org.dom4j.io.SAXReader
import org.dom4j.xpath.DefaultXPath

import scala.collection.JavaConversions._

/**
  * @program: spark-test
  * @description: 解析xml
  * @author: dong.tl
  * @create: 2018-09-14 15:25
  **/
object ParseXML {
  def main(args: Array[String]): Unit = {
    //    val someXml= XML.loadFile("F:\\data\\200410103011.xml")
    //
    //    println(someXml)
    //    val headerField = someXml \ "application-body"
    //
    //    println("-----"+headerField(0).attributes)


    //    val parserFactory = SAXParserFactory.newInstance()
    ////    parserFactory.setValidating(false)
    ////    parserFactory.setNamespaceAware(false)
    ////    parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false)
    ////    parserFactory.setFeature("http://xml.org/sax/features/namespaces", false)
    ////    parserFactory.setFeature("http://xml.org/sax/features/validation", false)
    //    parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false)
    //    parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
    //    val parser = parserFactory.newSAXParser()
    ////    parser.setProperty(Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE,false)
    //    val source = new org.xml.sax.InputSource("F:\\data\\CN102003000040024CN0000001238422C9FULZH20170329CN00W.XML")
    //
    //
    //    val elem: Elem = XML.withSAXParser(parser).loadFile("F:\\data\\CN102003000040024CN0000001238422C9FULZH20170329CN00W.XML")
    //    println(elem)
    //    val value = elem\"Claims" \\ "Claim"
    //    println("==="+value(0) \"@num")
    //    val d = elem\"BibliographicData"\"PublicationReference"
    //
    //    println("==="+d(0).text)

    val reader = new SAXReader
//    val doc: Document = reader.read( new ByteArrayInputStream(Files.readAllBytes(Paths.get("F:\\data\\dome\\CN302017000232726CN00003044344700SDBPZH20180102CN00Z_S\\CN302017000232726CN00003044344700SDBPZH20180102CN00Z.XML"))))

    val doc: Document = reader.read( new ByteArrayInputStream(Files.readAllBytes(Paths.get("F:\\data\\CN102003000040024CN0000001238422C9FULZH20170329CN00W.XML"))))


//    val xpath = doc.createXPath("/business:PatentDocumentAndRelated/business:BibliographicData/business:PublicationReference[@dataFormat='standard']/base:DocumentID/base:WIPOST3Code/text()|/business:PatentDocumentAndRelated/business:DesignBibliographicData/business:PublicationReference[@dataFormat='standard']/base:DocumentID/base:WIPOST3Code/text()")
////    xpath.setNamespaceURIs(Collections.singletonMap("business",
////      "http://www.sipo.gov.cn/XMLSchema/business"))
//    val nodes = xpath.selectNodes(doc)

        val nodes  = doc.selectNodes("/business:PatentDocumentAndRelated/business:BibliographicData/business:PublicationReference[@dataFormat='standard']/base:DocumentID/base:WIPOST3Code/text()")

//    val nodes  = doc.selectNodes("/business:PatentDocumentAndRelated/business:BibliographicData/business:PublicationReference[@dataFormat='standard']/base:DocumentID")



    nodes.toStream.foreach(node =>{
      println(node.getStringValue)

      println(node.asXML())
    })

//    nodes.asScala.forEach(node =>{
//        println(node)
//    })


    println("--"+util.getStringValue(doc,"/business:PatentDocumentAndRelated/@dateProduced"))

  }
}
